1 Introduction:

This script uses the output of “r_l_preparation.Rmd” and returns all values reported in the paper.

2 Setup:

Load packages:

library(tidyverse) # data processing
library(brms) # bayesian models
#library(cmdstanr) # install it with: install.packages("cmdstanr", repos = c("https://mc-stan.org/r-packages/", getOption("repos"))) followed by install_cmdstan()
library(ggdist) # for plotting
# option for Bayesian regression models:
# use all available cores for parallel computing
options(mc.cores = parallel::detectCores())

## Set the script's path as working directory
#parentfolder = rstudioapi::getActiveDocumentContext()$path 
#setwd(dirname(parentfolder))
#parentfolder <- getwd()
parentfolder <- "."; # assume current folder is the document folder

models        <- paste0(parentfolder, '/models/')
plots         <- paste0(parentfolder, '/plots/')
data          <- paste0(parentfolder, '/data/')
# Various auxiliary functions:
library(parallel);
library(lme4);
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:brms':
## 
##     ngrps
library(performance);
library(brms);
library(bayestestR);
## 
## Attaching package: 'bayestestR'
## The following object is masked from 'package:ggdist':
## 
##     hdi
library(ggplot2);
library(gridExtra);
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggpubr);
library(sjPlot);
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
brms_ncores  <- max(detectCores(all.tests=TRUE, logical=FALSE), 4, na.rm=TRUE); # try to use multiple cores, if present

# Verbal interpretation of Bayes factors (BF):
BF_interpretation <- function(BF, model1_name="m1", model2_name="m2")
{
  if( BF > 100 )   return (paste0("extreme evidence for ",model1_name," against ",model2_name, " (BF=",sprintf("%.3g",BF),")"));
  if( BF > 30 )    return (paste0("very strong evidence for ",model1_name," against ",model2_name, " (BF=",sprintf("%.3g",BF),")"));
  if( BF > 10 )    return (paste0("strong evidence for ",model1_name," against ",model2_name, " (BF=",sprintf("%.3g",BF),")"));
  if( BF > 3 )     return (paste0("moderate evidence for ",model1_name," against ",model2_name, " (BF=",sprintf("%.3g",BF),")"));
  if( BF > 1 )     return (paste0("anecdotal evidence for ",model1_name," against ",model2_name, " (BF=",sprintf("%.3g",BF),")"));
  if( BF == 1 )    return (paste0("no evidence for ",model1_name," nor ",model2_name, " (BF=",sprintf("%.3g",BF),")"));
  if( BF > 0.33 )  return (paste0("anecdotal evidence for ",model2_name," against ",model1_name, " (BF=",sprintf("%.3g",BF),")"));
  if( BF > 0.10 )  return (paste0("moderate evidence for ",model2_name," against ",model1_name, " (BF=",sprintf("%.3g",BF),")"));
  if( BF > 0.033 ) return (paste0("strong evidence for ",model2_name," against ",model1_name, " (BF=",sprintf("%.3g",BF),")"));
  if( BF > 0.010 ) return (paste0("very strong evidence for ",model2_name," against ",model1_name, " (BF=",sprintf("%.3g",BF),")"));
  return (paste0("extreme evidence for ",model2_name," against ",model1_name, " (BF=",sprintf("%.3g",BF),")"));
}

# Here I hack brms' kfold code to make it run in parallel using good old mclapply instead of futures
# this avoid random crashes which seem to be due to future, but works only on *NIX (which, for me here, is not an issue)
# Adapted the code from https://github.com/paul-buerkner/brms/blob/master/R/loo.R and https://github.com/paul-buerkner/brms/blob/master/R/kfold.R
if( Sys.info()['sysname'] == "Windows" )
{
  # In Windows, fall back to the stadard implementation in brms:
  add_criterion_kfold_parallel <- function(model, K=10, chains=1)
  {
    return (add_criterion(model, criterion="kfold", K=K, chains=chains));
  }
} else
{
  # On anything else, try to use maclapply:
  add_criterion_kfold_parallel <- function(model, K=10, chains=1)
  {
    model <- restructure(model);
  
    mf <- model.frame(model); 
    attributes(mf)[c("terms", "brmsframe")] <- NULL;
    N <- nrow(mf);
    
    if( K > N ) return (model); # does not work in this case...
    
    fold_type <- "random"; folds <- loo::kfold_split_random(K, N);
    Ksub <- seq_len(K);
  
    kfold_results <- mclapply(Ksub, function(k) 
    {
      omitted <- predicted <- which(folds == k);
      mf_omitted <- mf[-omitted, , drop=FALSE];
      
      if( exists("subset_data2", envir=asNamespace("brms")) )
      {
        # Newer versions of brms:
        model_updated <- base::suppressWarnings(update(model, newdata=mf_omitted, data2=brms:::subset_data2(model$data2, -omitted), refresh=0, chains=chains));
        
        lppds <- log_lik(model_updated, newdata=mf[predicted, , drop=FALSE], newdata2=brms:::subset_data2(model$data2, predicted), 
                         allow_new_levels=TRUE, resp=NULL, combine=TRUE, chains=chains);
      } else if( exists("subset_autocor", envir=asNamespace("brms")) )
      {
        # Older versions of brms:
        model2 <- brms:::subset_autocor(model, -omitted, incl_car=TRUE);
        model_updated <- base::suppressWarnings(update(model2, newdata=mf_omitted, refresh=0, chains=chains));
        
        lppds <- log_lik(model_updated, newdata=mf[predicted, , drop=FALSE], allow_new_levels=TRUE, resp=NULL, combine=TRUE, chains=chains);
      } else
      {
        stop("Unknown version of brms!");
      }
  
      return (list("obs_order"=predicted, "lppds"=lppds));
    }, mc.cores=ifelse(exists("brms_ncores"), brms_ncores, detectCores()));
    
    # Put them back in the form expected by the the following unmodifed code:
    obs_order <- lapply(kfold_results, function(x) x$obs_order);
    lppds     <- lapply(kfold_results, function(x) x$lppds);
    
    elpds <- brms:::ulapply(lppds, function(x) apply(x, 2, brms:::log_mean_exp))
    # make sure elpds are put back in the right order
    elpds <- elpds[order(unlist(obs_order))]
    elpd_kfold <- sum(elpds)
    se_elpd_kfold <- sqrt(length(elpds) * var(elpds))
    rnames <- c("elpd_kfold", "p_kfold", "kfoldic")
    cnames <- c("Estimate", "SE")
    estimates <- matrix(nrow = 3, ncol = 2, dimnames = list(rnames, cnames))
    estimates[1, ] <- c(elpd_kfold, se_elpd_kfold)
    estimates[3, ] <- c(-2 * elpd_kfold, 2 * se_elpd_kfold)
    out <- brms:::nlist(estimates, pointwise = cbind(elpd_kfold = elpds))
    atts <- brms:::nlist(K, Ksub, NULL, folds, fold_type)
    attributes(out)[names(atts)] <- atts
    out <- structure(out, class = c("kfold", "loo"))
    
    attr(out, "yhash") <- brms:::hash_response(model, newdata=NULL, resp=NULL);
    attr(out, "model_name") <- "";
    
    model$criteria$kfold <- out;
    model;
  }
}

# Bayesian fit indices for a given model:
brms_fit_indices <- function(model, indices=c("bayes_R2", "loo", "waic", "kfold"), K=10, verbose=TRUE, do.parallel=TRUE)
{
  if( "bayes_R2" %in% indices )
  {
    if( verbose) cat("R^2...\n");
    #attr(model, "R2") <- bayes_R2(model); 
    model <- add_criterion(model, "bayes_R2"); 
  } else
  {
    # Remove the criterion (if already there):
    if( !is.null(model$criteria) && "bayes_R2" %in% names(model$criteria) ) model$criteria[[ which(names(model$criteria) == "bayes_R2") ]] <- NULL;
  }
  
  if( "loo" %in% indices )
  {
    if( verbose) cat("LOO...\n");
    model <- add_criterion(model, "loo"); 
  } else
  {
    # Remove the criterion (if already there):
    if( !is.null(model$criteria) && "loo" %in% names(model$criteria) ) model$criteria[[ which(names(model$criteria) == "loo") ]] <- NULL;
  }
  
  if( "waic" %in% indices )
  {
    if( verbose) cat("WAIC...\n");
    model <- add_criterion(model, "waic"); 
  } else
  {
    # Remove the criterion (if already there):
    if( !is.null(model$criteria) && "waic" %in% names(model$criteria) ) model$criteria[[ which(names(model$criteria) == "waic") ]] <- NULL;
  }
  
  if( "kfold" %in% indices )
  {
    if( verbose) cat(paste0("KFOLD (K=",K,")...\n"));
    model1 <- NULL;
    if( !do.parallel )
    {
      try(model1 <- add_criterion(model, "kfold", K=K, chains=1), silent=TRUE);
    } else
    {
      try(model1 <- add_criterion_kfold_parallel(model, K=K, chains=1), silent=TRUE);
    }
    if( !is.null(model1) )
    {
      model <- model1;
    } else
    {
      # Remove the criterion (if already there):
      if( !is.null(model$criteria) && "kfold" %in% names(model$criteria) ) model$criteria[[ which(names(model$criteria) == "kfold") ]] <- NULL;
    }
  } else
  {
    # Remove the criterion (if already there):
    if( !is.null(model$criteria) && "kfold" %in% names(model$criteria) ) model$criteria[[ which(names(model$criteria) == "kfold") ]] <- NULL;
  }

  gc();
  
  return (model);
}

# Bayesian model comparison:
#model1 <- b_uvbm__blue
#model2 <- b_popsize__blue
brms_compare_models <- function(model1, model2, name1=NA, name2=NA, bayes_factor=TRUE, print_results=TRUE)
{
  if( !is.null(model1$criteria) && "bayes_R2" %in% names(model1$criteria) && !is.null(model1$criteria$bayes_R2) &&
      !is.null(model2$criteria) && "bayes_R2" %in% names(model2$criteria) && !is.null(model2$criteria$bayes_R2) )
  {
    R2_1_2 <- (mean(model1$criteria$bayes_R2) - mean(model2$criteria$bayes_R2));
  } else
  {
    R2_1_2 <- NA;
  }
  
  if( bayes_factor )
  {
    invisible(capture.output(bf_1_2 <- brms::bayes_factor(model1, model2)$bf));
    bf_interpret_1_2 <- BF_interpretation(bf_1_2, ifelse(!is.na(name1), name1, "model1"), ifelse(!is.na(name2), name2, "model2")); 
  }
  else
  {
    bf_1_2 <- NULL; bf_interpret_1_2 <- NA;
  }
  
  if( !is.null(model1$criteria) && "loo" %in% names(model1$criteria) && !is.null(model1$criteria$loo) &&
      !is.null(model2$criteria) && "loo" %in% names(model2$criteria) && !is.null(model2$criteria$loo) )
  {
    loo_1_2 <- loo_compare(model1, model2, criterion="loo", model_names=c(ifelse(!is.na(name1), name1, "model1"), ifelse(!is.na(name2), name2, "model2")));
  } else
  {
    loo_1_2 <- NA;
  }
  
  if( !is.null(model1$criteria) && "waic" %in% names(model1$criteria) && !is.null(model1$criteria$waic) &&
      !is.null(model2$criteria) && "waic" %in% names(model2$criteria) && !is.null(model2$criteria$waic) )
  {
    waic_1_2 <- loo_compare(model1, model2, criterion="waic", model_names=c(ifelse(!is.na(name1), name1, "model1"), ifelse(!is.na(name2), name2, "model2")));
    mw_1_2 <- model_weights(model1, model2, weights="waic", model_names=c(ifelse(!is.na(name1), name1, "model1"), ifelse(!is.na(name2), name2, "model2")));
  } else
  {
    waic_1_2 <- NA; 
    mw_1_2 <- NA;
  }
  
  if( !is.null(model1$criteria) && "kfold" %in% names(model1$criteria) && !is.null(model1$criteria$kfold) &&
      !is.null(model2$criteria) && "kfold" %in% names(model2$criteria) && !is.null(model2$criteria$kfold) )
  {
    kfold_1_2 <- loo_compare(model1, model2, criterion="kfold", model_names=c(ifelse(!is.na(name1), name1, "model1"), ifelse(!is.na(name2), name2, "model2")));
  } else
  {
    kfold_1_2 <- NA;
  }
  
  if( print_results )
  {
    cat(paste0("\nComparing models '",ifelse(!is.na(name1), name1, "model1"),"' and '",ifelse(!is.na(name2), name2, "model2"),"':\n\n"));
    cat(paste0("\ndelta R^2 = ",sprintf("%.1f%%",100*R2_1_2),"\n\n"));
    cat(bf_interpret_1_2,"\n\n");
    cat("\nLOO:\n"); print(loo_1_2);
    cat("\nWAIC:\n"); print(waic_1_2);
    cat("\nKFOLD:\n"); print(kfold_1_2);
    cat("\nModel weights (WAIC):\n"); print(mw_1_2);
    cat("\n");
  }
  
  gc();
  
  return (list("R2"=R2_1_2, "BF"=bf_1_2, "BF_interpretation"=bf_interpret_1_2, "LOO"=loo_1_2, "WAIC"=waic_1_2, "KFOLD"=kfold_1_2, "model_weights_WAIC"=mw_1_2));
}

# print model comparisons
.print.model.comparison <- function(a=NULL, a.names=NULL, b=NULL) # a is the anova and b is the Bayesian comparison (only one can be non-NULL), a.names are the mappings between the inner and user-friendly model names
{
  # ANOVA:
  if( !is.null(a) )
  {
    a <- as.data.frame(a);
    if( !is.null(a.names) )
    {
      if( length(a.names) != nrow(a) || !all(names(a.names) %in% rownames(a)) )
      {
        stop("a.names do not correspond the anova model names!");
        return (NULL);
      }
      rownames(a) <- a.names[rownames(a)];
    }
    i <- which.min(a$AIC);
    s.a <- sprintf("%s %s %s: ΔAIC=%.1f, ΔBIC=%.1f", 
                   rownames(a)[i], 
                   ifelse((!is.na(a[2,"Pr(>Chisq)"]) && a[2,"Pr(>Chisq)"] <0.05) || (abs(a$AIC[1] - a$AIC[2]) > 3), ">", "≈"),
                   rownames(a)[3-i],
                   abs(a$AIC[1] - a$AIC[2]), 
                   abs(a$BIC[1] - a$BIC[2]));
    if( !is.na(a[2,"Pr(>Chisq)"]) )
    {
      s.a <- paste0(s.a,
                    sprintf(", *p*=%s", scinot(a[2,"Pr(>Chisq)"])));
    }
    
    # return value:
    return (s.a);
  }
  
  # Bayesian comparison:
  if( !is.null(b) )
  {
    s.b <- sprintf("BF: %s, ΔLOO(%s %s %s)=%.1f (%.1f), ΔWAIC(%s %s %s)=%.1f (%.1f), ΔKFOLD(%s %s %s)=%.1f (%.1f)",
                   # BF:
                   b$BF_interpretation, 
                   # LOO:
                   rownames(b$LOO)[1],
                   ifelse(abs(b$LOO[1,1]-b$LOO[2,1]) < 4 || abs(b$LOO[1,1]-b$LOO[2,1]) < b$LOO[2,2], "≈", ifelse(abs(b$LOO[1,1]-b$LOO[2,1]) < 2*b$LOO[2,2], ">", ">>")),
                   rownames(b$LOO)[2], 
                   abs(b$LOO[1,1]-b$LOO[2,1]), b$LOO[2,2], 
                   # WAIC:
                   rownames(b$WAIC)[1], 
                   ifelse(abs(b$WAIC[1,1]-b$WAIC[2,1]) < 4 || abs(b$WAIC[1,1]-b$WAIC[2,1]) < b$WAIC[2,2], "≈", ifelse(abs(b$WAIC[1,1]-b$WAIC[2,1]) < 2*b$WAIC[2,2], ">", ">>")), 
                   rownames(b$WAIC)[2], 
                   abs(b$WAIC[1,1]-b$WAIC[2,1]), b$WAIC[2,2],
                   # KFOLD:
                   ifelse(is.null(b$KFOLD) || all(is.na(b$KFOLD)), "?", rownames(b$KFOLD)[1]), 
                   ifelse(is.null(b$KFOLD) || all(is.na(b$KFOLD)), "?", ifelse(abs(b$KFOLD[1,1]-b$KFOLD[2,1]) < 4 || abs(b$KFOLD[1,1]-b$KFOLD[2,1]) < b$KFOLD[2,2], "≈", ifelse(abs(b$KFOLD[1,1]-b$KFOLD[2,1]) < 2*b$KFOLD[2,2], ">", ">>"))), 
                   ifelse(is.null(b$KFOLD) || all(is.na(b$KFOLD)), "?", rownames(b$KFOLD)[2]), 
                   ifelse(is.null(b$KFOLD) || all(is.na(b$KFOLD)), NA, abs(b$KFOLD[1,1]-b$KFOLD[2,1])), ifelse(is.null(b$KFOLD) || all(is.na(b$KFOLD)), NA, b$KFOLD[2,2]));
    
    # return value:
    return (s.b);
  }
}

# Standard error of the mean:
std <- function(x) sd(x)/sqrt(length(x))

# Root Mean Square Error (RMSE) between observed (y) and predicted (yrep) values:
rmse <- function(y, yrep)
{
  yrep_mean <- colMeans(yrep)
  sqrt(mean((yrep_mean - y)^2))
}

# Log odds to probability (logistic regression):
lo2p <- function(x){ o <- exp(x); return (o/(1+o));}

# Scientific notation using Markdown conventions (inspired from https://www.r-bloggers.com/2015/03/scientific-notation-for-rlatex/):
scinot <- function(xs, digits=2, pvalue=TRUE)
{
  scinot1 <- function(x)
  {
    sign <- "";
    if(x < 0)
    {
      sign <- "-";
      x <- -x;
    }
    exponent <- floor(log10(x));
    if(exponent && pvalue && exponent < -3) 
    {
      xx <- round(x / 10^exponent, digits=digits);
      e <- paste0("×10^", round(exponent,0), "^");
    } else 
    {
      xx <- round(x, digits=digits+1);
      e <- "";
    }
    paste0(sign, xx, e);
  }
  vapply(xs, scinot1, character(1));
}

# Escape * in a string:
escstar <- function(s)
{
  gsub("*", "\\*", s, fixed=TRUE);
}

For reproducible reporting:

packageVersion('tidyverse')
## [1] '2.0.0'
packageVersion('ggplot2')
## [1] '3.4.4'
packageVersion('brms')
## [1] '2.20.4'
#packageVersion('cmdstanr')
packageVersion('ggdist')
## [1] '3.3.1'

Load ggplot2 theme and colors:

source('theme_timo.R')

colorBlindBlack8  <- c("#000000", "#E69F00", "#56B4E9", "#009E73", 
                       "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

Load data:

web     <- read_csv(paste0(data, 'web_experiment_cleaned.csv'))
web_raw <- read_csv(paste0(data, '/web_raw_trials.csv'))

field     <- read_csv(paste0(data, 'field_experiment_cleaned.csv'))
field_raw <- read_csv(paste0(data, '/field_raw_trials.csv'))

3 Online experiment

3.1 Descriptive statistics

First, how many participants?

nrow(web)
## [1] 903

Sex division

table(web$Sex)
## 
##   F   M 
## 681 222

Ages

summary(web$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   23.00   29.00   32.92   40.00   84.00

Order division

# Counts
table(web$Order)
## 
## l_first r_first 
##     436     467
# Percentage
prop.table(table(web$Order)) * 100
## 
## l_first r_first 
## 48.2835 51.7165

First, how many languages?

web %>% count(Name) %>% nrow()
## [1] 25

Does this number correspond with the L1s?

web %>% count(Language) %>% nrow()
## [1] 25

How many families?

web %>% count(Family) %>% nrow()
## [1] 9

How many have the R/L distinction in the L1 among the languages?

web %>% count(Language, r_l_distinction_L1) %>% count(r_l_distinction_L1)

How many really use the alveolar trill in L1 among the languages?

web %>% count(Language, trill_real_L1) %>% count(trill_real_L1)

How many really have the alveolar trill in L1 as an allophone among the languages?

web %>% count(Language, trill_occ_L1) %>% count(trill_occ_L1)

What about the same questions for L2. But this will not neatly sum up to 25, due to various possible scenarios for L2 within a specific L1.

How many have the R/L distinction in the L2 among the languages?

web %>% count(Language, r_l_distinction_L2) %>% count(r_l_distinction_L2)

How many really use the alveolar trill in L2 among the languages?

web %>% count(Language, trill_real_L2) %>% count(trill_real_L2)

How many really have the alveolar trill in L2 as an allophone among the languages?

web %>% count(Language, trill_occ_L2) %>% count(trill_occ_L2)

What is the grand average congruent behavior?

mean(web$Match)
## [1] 0.8726467

87.3%

What about only among those who have L1 without the distinction?

web %>%
  filter(r_l_distinction_L1 == "0") %>%
  summarize(mean_match = mean(Match, na.rm = TRUE))

83.9%

What about only among those who have L1 without the distinction and no L2 that distinguishes?

web %>%
  filter(r_l_distinction_L1 == "0") %>%
  filter(!EnglishL2YesNo) %>% 
  filter(r_l_distinction_L1 == '0') %>% 
  summarize(mean_match = mean(Match, na.rm = TRUE))

85.1%

Compute average matching behavior per language and sort:

web_avg <- web %>%
  group_by(Language) %>% 
  summarize(M = mean(Match)) %>% 
  arrange(desc(M)) %>% 
  mutate(percent = round(M, 2) * 100,
         percent = str_c(percent, '%'))

# Show:

web_avg %>% print(n = Inf)
## # A tibble: 25 × 3
##    Language     M percent
##    <chr>    <dbl> <chr>  
##  1 FI       1     100%   
##  2 FR       0.982 98%    
##  3 EN       0.974 97%    
##  4 DE       0.953 95%    
##  5 SE       0.952 95%    
##  6 DK       0.944 94%    
##  7 HU       0.941 94%    
##  8 JP       0.927 93%    
##  9 KR       0.909 91%    
## 10 GR       0.905 90%    
## 11 PL       0.887 89%    
## 12 RU       0.872 87%    
## 13 EE       0.860 86%    
## 14 FA       0.857 86%    
## 15 AM       0.85  85%    
## 16 ZU       0.85  85%    
## 17 IT       0.846 85%    
## 18 TR       0.811 81%    
## 19 ES       0.806 81%    
## 20 GE       0.8   80%    
## 21 TH       0.8   80%    
## 22 PT       0.770 77%    
## 23 RO       0.742 74%    
## 24 AL       0.7   70%    
## 25 CN       0.696 70%

Check some demographics, also to report in the paper. First, the number of participants per language:

web %>% 
  count(Name, sort = TRUE) %>% print(n = Inf)
## # A tibble: 25 × 2
##    Name           n
##    <chr>      <int>
##  1 German        85
##  2 Portuguese    61
##  3 French        57
##  4 Japanese      55
##  5 Polish        53
##  6 Italian       52
##  7 Russian       47
##  8 Chinese       46
##  9 Estonian      43
## 10 Greek         42
## 11 English       39
## 12 Turkish       37
## 13 Spanish       36
## 14 Hungarian     34
## 15 Romanian      31
## 16 Korean        22
## 17 Farsi         21
## 18 Swedish       21
## 19 Armenian      20
## 20 Thai          20
## 21 Zulu          20
## 22 Danish        18
## 23 Finnish       18
## 24 Georgian      15
## 25 Albanian      10

Then, the number of L1 speakers who have R/L distinction vs. who don’t:

web %>% count(r_l_distinction_L1) %>%
  mutate(prop = n / sum(n),
         percent = round(prop, 2) * 100,
         percent = str_c(percent, '%'))

How many people do not have any L2?

sum(is.na(web$L2)) / nrow(web)
## [1] 0.1351052
# bilinguals:
1 - sum(is.na(web$L2)) / nrow(web)
## [1] 0.8648948

Check how many people knew English as their L2:

web %>% count(EnglishL2YesNo) %>% 
  mutate(prop = n / sum(n),
         percent = round(prop, 2) * 100,
         percent = str_c(percent, '%'))

Of those that don’t use a R/L distinction in their L1, how many use R/L distinction in their L2? (double-check if logic alright!)

web %>%
  filter(r_l_distinction_L1 == 0) %>% 
  count(r_l_distinction_L2 == 1) %>% 
  mutate(prop = n / sum(n),
         percent = round(prop, 2) * 100,
         percent = str_c(percent, '%'))

How many “pure” speakers were there?, i.e., those people that 1) don’t know English, 2) don’t use an L1 with a R/L distinction, and 3) don’t know an L2 that distinguishes R/L.

web %>% 
  filter(r_l_distinction_L1 == 0) %>%  # excludes English as well
  filter(!EnglishL2YesNo) %>% 
  filter(r_l_distinction_L2 == 0) %>% 
  nrow()
## [1] 1

1 person!

Let’s check if this is correct. This gives the list of all participants for whom this applies.

web %>% 
    filter(r_l_distinction_L1 == 0 & !EnglishL2YesNo & r_l_distinction_L2 == 0) %>% 
    print(n = 50)
## # A tibble: 1 × 18
##   ID         Match Language Sex     Age Name    Script Family Autotyp_Area L2   
##   <chr>      <dbl> <chr>    <chr> <dbl> <chr>   <chr>  <chr>  <chr>        <chr>
## 1 JP_2040343     1 JP       F        48 Japane… diffe… Japan… N Coast Asia chin…
## # ℹ 8 more variables: EnglishL2YesNo <lgl>, Order <chr>,
## #   r_l_distinction_L1 <dbl>, trill_real_L1 <dbl>, trill_occ_L1 <dbl>,
## #   r_l_distinction_L2 <dbl>, trill_real_L2 <dbl>, trill_occ_L2 <dbl>

Are these really “pure”? What languages do they speak?

web %>% 
  filter(r_l_distinction_L1 == 0) %>%  # excludes English as well
  filter(!EnglishL2YesNo) %>% 
  filter(r_l_distinction_L2 == 0) %>% 
  count(Language)

One Japanese speaker.

Nevertheless, let’s explore whether these also show matches?

web %>% 
  filter(r_l_distinction_L1 == 0) %>%  # excludes English as well
  filter(!EnglishL2YesNo) %>% 
  filter(r_l_distinction_L2 == 0) %>% 
  count(Match) %>% 
  mutate(prop = n / sum(n),
         percent = round(prop, 2) * 100,
         percent = str_c(percent, '%'))

Yes, similar to above 85%.

3.2 Regression models

3.2.1 What random structure to use?

An important point to clarify is what should be the random effects structure of our regression models.

On the one hand, it is usually recommended that one should include the full random structure, which in our case would mean not just the three “grouping factors” Language, Family and (Autotyp) Area, but also the random slopes for all the fixed effects (and their interactions) included in the model. So, for example, in a model with Order, r/t distinction (in L1) and the their interaction as fixed effects, we should have the following fixed and random effects structure:

Match ~ 1 + Order + r_l_distinction_L1 + Order:r_l_distinction_L1 +
  (1 + Order + r_l_distinction_L1 + Order:r_l_distinction_L1 | Language) +
  (1 + Order + r_l_distinction_L1 + Order:r_l_distinction_L1 | Family) +
  (1 + Order + r_l_distinction_L1 + Order:r_l_distinction_L1 | Autotyp_Area)
Autotyp areas. Reproduced from The AUTOTYP database GitHub repo under a CC-BY-4.0 license.
Autotyp areas. Reproduced from The AUTOTYP database GitHub repo under a CC-BY-4.0 license.

However, when it comes to the r/l distinction and the presence of [r] in the language (L1 or L2), it is clear that these variables do not, by definition, vary within Language (so there should be no random slope here), and, at least in our current data, they very very little within the levels of the other two grouping factors as well (see below), raising the legitimate question whether we should model random slopes for these two variables at all.

Order: we know this varies within all levels of Language, Family and Area because of the experiment design, so Order should have varying slopes for all three.

Sex: likewise, sex varies by design so it should have random slopes for all three.

Age: same for age, so it should have random slopes for all three.

r/l distinction in L1:

table(web$r_l_distinction_L1, web$Language)
##    
##     AL AM CN DE DK EE EN ES FA FI FR GE GR HU IT JP KR PL PT RO RU SE TH TR ZU
##   0  0  0 46  0  0  0  0  0  0  0  0  0  0  0  0 55 22  0  0  0  0  0  0  0 20
##   1 10 20  0 85 18 43 39 36 21 18 57 15 42 34 52  0  0 53 61 31 47 21 20 37  0
table(web$r_l_distinction_L1, web$Family)
##    
##     Benue-Congo Finno-Ugric  IE Japanese Kartvelian Korean Sino-Tibetan
##   0          20           0   0       55          0     22           46
##   1           0          95 593        0         15      0            0
##    
##     Tai-Kadai Turkic
##   0         0      0
##   1        20     37
#web %>% group_by(Family) %>% select(Family, Name) %>% unique() %>% arrange(Family)
table(web$r_l_distinction_L1, web$Autotyp_Area)
##    
##     Europe Greater Mesopotamia Inner Asia N Coast Asia S Africa Southeast Asia
##   0      0                   0          0           77       20             46
##   1    539                  93        108            0        0             20
#web %>% group_by(Autotyp_Area) %>% select(Autotyp_Area, Name) %>% unique() %>% arrange(Autotyp_Area)

This is perfectly uniform within the levels of each of the three factors, so random slopes are arguably not justified a priori for neither of them.

r/l distinction in L2:

table(web$r_l_distinction_L2, web$Language)
##    
##     AL AM CN DE DK EE EN ES FA FI FR GE GR HU IT JP KR PL PT RO RU SE TH TR ZU
##   0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0
##   1  9 20 30 84 16 43 28 34 18 18 46 15 42 26 50 28 21 52 42 30 43 19 18 29 18
table(web$r_l_distinction_L2, web$Family)
##    
##     Benue-Congo Finno-Ugric  IE Japanese Kartvelian Korean Sino-Tibetan
##   0           0           0   1        1          0      0            0
##   1          18          87 533       28         15     21           30
##    
##     Tai-Kadai Turkic
##   0         0      0
##   1        18     29
table(web$r_l_distinction_L2, web$Autotyp_Area)
##    
##     Europe Greater Mesopotamia Inner Asia N Coast Asia S Africa Southeast Asia
##   0      1                   0          0            1        0              0
##   1    478                  82        104           49       18             48

There is almost no “absent” at all, so random slopes are arguably not justified a priori for neither of them.

presence of [r] in L1:

table(web$trill_real_L1, web$Language)
##    
##     AL AM CN DE DK EE EN ES FA FI FR GE GR HU IT JP KR PL PT RO RU SE TH TR ZU
##   0  0  0 46 82 18  0 38  0  0  0 56 14 42  0  0 55 22  0 61  0  0 20 20 37 20
##   1 10 20  0  3  0 43  1 36 21 18  1  1  0 34 52  0  0 53  0 31 47  1  0  0  0
table(web$trill_real_L1, web$Family)
##    
##     Benue-Congo Finno-Ugric  IE Japanese Kartvelian Korean Sino-Tibetan
##   0          20           0 317       55         14     22           46
##   1           0          95 276        0          1      0            0
##    
##     Tai-Kadai Turkic
##   0        20     37
##   1         0      0
table(web$trill_real_L1, web$Autotyp_Area)
##    
##     Europe Greater Mesopotamia Inner Asia N Coast Asia S Africa Southeast Asia
##   0    317                  51          0           77       20             66
##   1    222                  42        108            0        0              0

This is almost uniform within Language, but there is variation for the IE level of Family (almost 50%:50% between “no” and “yes”), and between 2 levels of Area (Europe with 60%:40% and Greater Mesopotamia with 55%:45% “no”:“yes”), making the decision here much harder.

presence of [r] in L2:

table(web$trill_real_L2, web$Language)
##    
##     AL AM CN DE DK EE EN ES FA FI FR GE GR HU IT JP KR PL PT RO RU SE TH TR ZU
##   0  5  1 29 38 13  5 16 22 16  8 28  3 28 18 30 26 21 34 19 21 32 11 18 28 16
##   1  4 19  1 46  3 38 13 12  2 10 18 12 14  8 20  3  0 18 23  9 11  8  0  1  2
table(web$trill_real_L2, web$Family)
##    
##     Benue-Congo Finno-Ugric  IE Japanese Kartvelian Korean Sino-Tibetan
##   0          16          31 314       26          3     21           29
##   1           2          56 220        3         12      0            1
##    
##     Tai-Kadai Turkic
##   0        18     28
##   1         0      1
table(web$trill_real_L2, web$Autotyp_Area)
##    
##     Europe Greater Mesopotamia Inner Asia N Coast Asia S Africa Southeast Asia
##   0    283                  48         45           47       16             47
##   1    196                  34         59            3        2              1

Here there is enough variation within the levels for all three factors, justifying the inclusion of random slopes.

Given these, we did the following:

  • we always include random slopes for Order, Sex and Age
  • we do not model random slopes for the r/l distinction in L1 or L2
  • for the presence of [r] in L2, we do model random slopes, but for L1 we we fit two models (a) with random slopes for Family and Area and (b) a model without any random intercepts (and we perform model comparison between the two).

However, we also performed, for all these predictors, model comparisons between the model with the full random efefcts structure (as appropriate for each predictor, see above) and a model with a simpler random effects structure, as detailed below.

Please note that in the frequentist models (using glmer) we could not fit the full random effects structure (due to various convergence issues), but we report which structure was used in each case.

web <- mutate(web, Order = factor(Order, levels = c('r_first', 'l_first'))) # make factor with r_first as baseline

web %>% count(Order) %>% mutate(prop = n / sum(n)) 
# the order effect is decently balanced: 51.6% vs 48.3%

web %>% count(r_l_distinction_L1) %>%
  mutate(prop = n / sum(n))
# highly imbalanced: 15.8% vs 84.2%

web %>% count(trill_real_L1) %>%
  mutate(prop = n / sum(n))
# ok: 58.8% vs 41.2%

web %>% count(trill_occ_L1) %>%
  mutate(prop = n / sum(n))
# 100% are 1 --> excluded from the model

## And for L2, just in case
web %>% count(r_l_distinction_L2) %>%
  mutate(prop = n / sum(n))
# 13.5% missing, the rest almost all (86.3%) are 1 and 0.2% are 0 ---> exclude as well

web %>% count(trill_real_L2) %>%
  mutate(prop = n / sum(n))
# 13.5% missing, the rest is balanced: 53.8% vs 32.7%

web %>% count(trill_occ_L2) %>%
  mutate(prop = n / sum(n))
# 13.5% missing, the rest are all (86.5%) are 1 ---> exclude as well

#web <- mutate(web,
#               order_num = ifelse(Order == 'r_first', -0.5, +0.5))

# Code them as factors:
web$r_l_distinction_L1_f <- factor(c("same", "distinct")[web$r_l_distinction_L1 + 1], levels=c("same", "distinct"));
web$trill_real_L1_f      <- factor(c("no", "yes")[web$trill_real_L1 + 1], levels=c("no", "yes"));
web$trill_occ_L1_f       <- factor(c("no", "yes")[web$trill_occ_L1 + 1], levels=c("no", "yes"));
web$r_l_distinction_L2_f <- factor(c("same", "distinct")[web$r_l_distinction_L2 + 1], levels=c("same", "distinct"));
web$trill_real_L2_f      <- factor(c("no", "yes")[web$trill_real_L2 + 1], levels=c("no", "yes"));
web$trill_occ_L2_f       <- factor(c("no", "yes")[web$trill_occ_L2 + 1], levels=c("no", "yes"));

(Please note that we hide the code for the model fitting as it is too large and clutters the output, but it can be consulted in the Rmarkdown file.)

3.2.2 The probability of a perfect match in the absence of any predictors

First, we determined the probability of a perfect match (i.e., both responses are correct) without any predictors (aka the null model).

For clarity, in the null model (i.e., with intercept only as fixed effect and varying intercepts only for the included random effects) and we are interested in the fixed effect of the intercept, which represents the probability of a match (while controlling for the random effects structure).

3.2.2.1 Frequentist

We could only model a varying intercept by Language:

print(web_regressions_L1_summaries$glmer$null$summary);
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: Match ~ 1 + (1 | Language)
##    Data: web
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+07))
## 
##      AIC      BIC   logLik deviance df.resid 
##    680.4    690.0   -338.2    676.4      901 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9660  0.2730  0.3432  0.4057  0.5672 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Language (Intercept) 0.3111   0.5578  
## Number of obs: 903, groups:  Language, 25
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.0065     0.1599   12.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The intercept is clearly and significantly > 0 (p=4.05×10-36) and translates into a probability of a match p(match)=88.1% 95%CI [84.5%, 91.1%] ≫ 50%.

The ICC of match estimated from the null model is 8.6%.

3.2.2.2 Bayesian

Please note that we used the maximal random effects structure (i.e., varying intercepts for all three factors):

cat(paste0(web_regressions_L1_summaries$brms$null$summary, collapse="\n"));
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: Match ~ 1 + (1 | Language) + (1 | Family) + (1 | Autotyp_Area) 
##    Data: web (Number of observations: 903) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 2;
##          total post-warmup draws = 12000
## 
## Group-Level Effects: 
## ~Autotyp_Area (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.42      0.38     0.02     1.37 1.00     6383     8921
## 
## ~Family (Number of levels: 9) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.35      0.30     0.01     1.10 1.00     7063     8906
## 
## ~Language (Number of levels: 25) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.59      0.20     0.24     1.02 1.00     6224     5212
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     1.90      0.35     1.18     2.57 1.00     9500     9082
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

The intercept is clearly and significantly > 0 (% inside ROPE = 0.0%) and translates into a probability of a match p(match)=87.0% 95%HDI [76.4%, 92.8%] ≫ 50%.

The prior predictive checks support the choice of priors:
Prior predictive checks. The observed values (y) should fall within the distribution of the simulated priori distribution (yrep) for the minimum, mean and maximum values.
Prior predictive checks. The observed values (y) should fall within the distribution of the simulated priori distribution (yrep) for the minimum, mean and maximum values.
The model converges well:
Traces of the Bayesian fitting processes.
Traces of the Bayesian fitting processes.
and the results show a positive effect of rough (but the 95%HDI does include 0)::
Posterior estimates (medians) with 50% and 90% posterior probability intervals.
Posterior estimates (medians) with 50% and 90% posterior probability intervals.
The posterior predictive checks seem ok:
Posterior predictive checks: density overlays of the observed (y) and simulated (yrep) data.
Posterior predictive checks: density overlays of the observed (y) and simulated (yrep) data.
Posterior predictive checks: minimum, mean and maximum values of the observed (y) and simulated (yrep) data.
Posterior predictive checks: minimum, mean and maximum values of the observed (y) and simulated (yrep) data.

3.2.3 The potential predictors individually

Then, we checked if any of the potential predictors adds anything to the null model. (Please note that we show only the relevant information to keep this document simple.)

In the Bayesian approach, we fitted the maximal random effects structure appropriate for each predictor, but also a simplified one (see above), but we had to drastically simplify it for the frequentist models to achieve convergence (as indicated in each case).

We show the intercepts and regression slopes both as log odds ratios and as probabilities, as appropriate. We use a tabular presentation combing the frequentist (“ML”) and Bayesian (“B”) approaches and showing, as appropriate, the estimate with its 95%CI or 95%HDI, the p-value or the proportion inside the ROPE and the p(=0) of the Bayesian formal hypothesis testing, and the model comparison versus the null as the χ2 test and ΔAIC(model - null) or the Bayes factor (BF), ΔLOO, ΔWAIC and ΔKFOLD (with the standard deviation).

3.2.3.1 Order

Random effects are:

  • frequentist (ML): (1 | Language)
  • Bayesian full (Bfre): (1 + Order | Language) + (1 + Order | Family) + (1 + Order | Autotyp_Area)
  • Bayesian simplified (Bsre): (1 + Order | Language)
variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) ML 2.53 [2.11, 2.95] 92.6% [89.2%, 95.0%] 3.81×10-32
Bfre 2.47 [1.77, 3.18] 92.2% [85.4%, 96.0%] 0.0%
Bsre 2.45 [2.08, 2.82] 92.0% [88.8%, 94.4%] 0.0%
order (βl_first-r_first) ML -0.92 [-1.34, -0.50] 28.6% [20.8%, 37.8%] 1.87×10-5 VS null: χ2(1)=19.06, p=1.27×10-5, ΔAIC=-17.1
order (βl_first-r_first) Bfre -0.92 [-2.06, 0.25] 28.4% [11.3%, 56.3%] pROPE=0.1%, p(β=0)=0.51 VS null: BF: extreme evidence for [+] against [0] (BF=0.00028), ΔLOO([+] >> [0])=14.2 (5.9), ΔWAIC([+] >> [0])=14.4 (5.9), ΔKFOLD([+] >> [0])=14.3 (5.8)
order (βl_first-r_first) Bsre -0.66 [-1.31, -0.02] 34.2% [21.3%, 49.5%] pROPE=0.0%, p(β=0)=0.53 VS null: BF: extreme evidence for [+] against [0] (BF=5.87e-07), ΔLOO([+] >> [0])=16.2 (5.7), ΔWAIC([+] >> [0])=16.3 (5.7), ΔKFOLD([+] >> [0])=13.8 (5.8)
VS Bfre: BF: extreme evidence for [sre] against [fre] (BF=0.00202), ΔLOO([sre] ≈ [fre])=2.0 (1.0), ΔWAIC([sre] ≈ [fre])=1.9 (1.0), ΔKFOLD([fre] ≈ [sre])=0.5 (2.1)

Thus, it is clear that Order has a significant effect, with “l first” lowering the probability of a match by ~30%, thus to ~60% from the ~90% if “r first” (still better than 50%). As expected, the simplified random effects structure is comparable to the full one.

3.2.3.2 Sex

Random effects are:

  • frequentist (ML): (1 | Language)
  • Bayesian full (Bfre): (1 + Sex | Language) + (1 + Sex | Family) + (1 + Sex | Autotyp_Area)
  • Bayesian simplified (Bsre): (1 + Sex | Language)
variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) ML 2.03 [1.70, 2.37] 88.4% [84.5%, 91.5%] 7.34×10-32
Bfre 1.93 [1.21, 2.65] 87.3% [77.0%, 93.4%] 0.0%
Bsre 2.03 [1.69, 2.40] 88.4% [84.4%, 91.7%] 0.0%
sex (βM-F) ML -0.11 [-0.57, 0.36] 47.3% [36.1%, 58.9%] 0.652 VS null: χ2(1)=0.20, p=0.657, ΔAIC=1.8
sex (βM-F) Bfre 0.07 [-1.09, 1.26] 51.6% [25.2%, 78.0%] pROPE=0.3%, p(β=0)=0.84 VS null: BF: extreme evidence for [0] against [+] (BF=499), ΔLOO([0] ≈ [+])=3.4 (1.8), ΔWAIC([0] ≈ [+])=3.0 (1.7), ΔKFOLD([0] >> [+])=6.9 (2.9)
sex (βM-F) Bsre -0.00 [-0.59, 0.59] 50.0% [35.6%, 64.4%] pROPE=0.5%, p(β=0)=0.90 VS null: BF: anecdotal evidence for [0] against [+] (BF=1.26), ΔLOO([0] ≈ [+])=0.7 (1.2), ΔWAIC([0] ≈ [+])=0.6 (1.2), ΔKFOLD([0] ≈ [+])=0.3 (1.9)
VS Bfre: BF: extreme evidence for [sre] against [fre] (BF=0.00267), ΔLOO([sre] ≈ [fre])=2.7 (1.5), ΔWAIC([sre] ≈ [fre])=2.4 (1.4), ΔKFOLD([sre] >> [fre])=6.5 (2.8)

Thus, Sex has no effect the probability of a match, and the simplified random effects structure might be better than the full one but their fixed effect estimates are very similar.

3.2.3.3 Age

Random effects are:

  • frequentist (ML): (1 + Age | Language)
  • Bayesian full (Bfre): (1 + Age | Language) + (1 + Age | Family) + (1 + Age | Autotyp_Area)
  • Bayesian simplified (Bsre): (1 + Age | Language)
variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) ML 2.11 [1.27, 2.95] 89.2% [78.0%, 95.0%] 9.23×10-7
Bfre 1.72 [0.12, 3.25] 84.8% [53.0%, 96.3%] 0.0%
Bsre 1.98 [1.23, 2.78] 87.8% [77.4%, 94.2%] 0.0%
age (β) ML -0.00 [-0.02, 0.02] 50.0% [49.5%, 50.5%] 0.944 VS null: χ2(3)=4.02, p=0.26, ΔAIC=2.0
age (β) Bfre 0.01 [-0.04, 0.05] 50.2% [49.0%, 51.2%] pROPE=1.0%, p(β=0)=0.99 VS null: BF: extreme evidence for [0] against [+] (BF=5.76e+08), ΔLOO([0] ≈ [+])=0.3 (1.9), ΔWAIC([0] ≈ [+])=0.1 (1.9), ΔKFOLD([0] ≈ [+])=1.3 (2.6)
age (β) Bsre 0.00 [-0.02, 0.02] 50.1% [49.5%, 50.6%] pROPE=1.0%, p(β=0)=1.00 VS null: BF: extreme evidence for [0] against [+] (BF=1.24e+03), ΔLOO([+] ≈ [0])=0.1 (1.1), ΔWAIC([+] ≈ [0])=0.2 (1.1), ΔKFOLD([+] ≈ [0])=0.2 (1.9)
VS Bfre: BF: extreme evidence for [sre] against [fre] (BF=1.84e-06), ΔLOO([sre] ≈ [fre])=0.4 (1.5), ΔWAIC([sre] ≈ [fre])=0.3 (1.5), ΔKFOLD([sre] ≈ [fre])=1.5 (2.0)

Thus, Age has no effect the probability of a match, and the simplified random effects structure is similar to the full one.

3.2.3.4 R/L distinction in L1?

Random effects are:

  • frequentist (ML): (1 | Language)
  • Bayesian full (Bfre): (1 | Language) + (1 | Family) + (1 | Autotyp_Area)
  • Bayesian simplified (Bsre): (1 | Language)
variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) ML 1.78 [1.05, 2.52] 85.6% [74.1%, 92.5%] 1.87×10-6
Bfre 1.79 [0.69, 3.01] 85.7% [66.6%, 95.3%] 0.0%
Bsre 1.81 [1.02, 2.66] 85.9% [73.4%, 93.4%] 0.0%
r/l distinction (βdistinct-not) ML 0.27 [-0.53, 1.07] 56.6% [37.0%, 74.4%] 0.514 VS null: χ2(1)=0.41, p=0.522, ΔAIC=1.6
r/l distinction (βdistinct-not) Bfre 0.16 [-1.14, 1.55] 53.9% [24.2%, 82.5%] pROPE=0.2%, p(β=0)=0.79 VS null: BF: moderate evidence for [0] against [+] (BF=3.95), ΔLOO([0] ≈ [+])=0.2 (0.2), ΔWAIC([0] ≈ [+])=0.2 (0.2), ΔKFOLD([0] ≈ [+])=3.6 (2.1)
r/l distinction (βdistinct-not) Bsre 0.26 [-0.62, 1.18] 56.3% [34.9%, 76.4%] pROPE=0.3%, p(β=0)=0.83 VS null: BF: moderate evidence for [+] against [0] (BF=0.161), ΔLOO([+] ≈ [0])=0.5 (0.8), ΔWAIC([+] ≈ [0])=0.5 (0.8), ΔKFOLD([+] ≈ [0])=0.7 (1.6)
VS Bfre: BF: strong evidence for [sre] against [fre] (BF=0.0407), ΔLOO([sre] ≈ [fre])=0.8 (0.8), ΔWAIC([sre] ≈ [fre])=0.8 (0.8), ΔKFOLD([sre] >> [fre])=4.4 (2.0)

Thus, r/l distinction in L1 has no effect the probability of a match, and the simplified random effects structure might be preferable to the full one.

3.2.3.5 [r] in L1?

Random effects are:

  • frequentist (ML): (1 | Language)
  • Bayesian full (Bfre): (1 + trill_real_L1_f | Language) + (1 + trill_real_L1_f | Family) + (1 + trill_real_L1_f | Autotyp_Area)
  • Bayesian simplified (Bsre): (1 + trill_real_L1_f | Language)
  • Bayesian simplified no random slope (Bnrs): (1 | Language)
variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) ML 2.19 [1.76, 2.63] 90.0% [85.3%, 93.3%] 6.64×10-23
Bfre 2.02 [1.06, 2.93] 88.3% [74.3%, 94.9%] 0.0%
Bsre 2.22 [1.73, 2.74] 90.2% [84.9%, 94.0%] 0.0%
Bnrs 2.22 [1.77, 2.73] 90.2% [85.4%, 93.9%] 0.0%
[r] (βyes-no) ML -0.40 [-1.02, 0.22] 40.0% [26.4%, 55.4%] 0.201 VS null: χ2(1)=1.69, p=0.194, ΔAIC=0.3
[r] (βyes-no) Bfre -0.84 [-3.46, 1.79] 30.1% [3.1%, 85.7%] VS null: BF: strong evidence for [0] against [+] (BF=16.4), ΔLOO([+] ≈ [0])=1.4 (1.8), ΔWAIC([+] ≈ [0])=1.5 (1.8), ΔKFOLD([0] ≈ [+])=0.5 (2.9)
[r] (βyes-no) Bsre -0.43 [-1.15, 0.30] 39.5% [24.0%, 57.5%] VS null: BF: anecdotal evidence for [+] against [0] (BF=0.493), ΔLOO([+] ≈ [0])=1.3 (1.4), ΔWAIC([+] ≈ [0])=1.3 (1.4), ΔKFOLD([+] ≈ [0])=0.7 (2.2)
VS Bfre: BF: very strong evidence for [sre] against [fre] (BF=0.0279), ΔLOO([fre] ≈ [sre])=0.1 (1.4), ΔWAIC([fre] ≈ [sre])=0.2 (1.4), ΔKFOLD([sre] ≈ [fre])=1.2 (2.5)
[r] (βyes-no) Bnrs -0.44 [-1.13, 0.26] 39.3% [24.5%, 56.5%] VS null: BF: moderate evidence for [+] against [0] (BF=0.118), ΔLOO([+] ≈ [0])=1.3 (1.4), ΔWAIC([+] ≈ [0])=1.3 (1.4), ΔKFOLD([+] ≈ [0])=3.2 (1.9)
VS Bfre: BF: extreme evidence for [nrs] against [fre] (BF=0.00696), ΔLOO([fre] ≈ [nrs])=0.1 (1.5), ΔWAIC([fre] ≈ [nrs])=0.2 (1.6), ΔKFOLD([nrs] ≈ [fre])=3.7 (2.6)
VS Bsre: BF: moderate evidence for [nrs] against [sre] (BF=0.239), ΔLOO([sre] ≈ [nrs])=0.0 (0.3), ΔWAIC([sre] ≈ [nrs])=0.1 (0.3), ΔKFOLD([nrs] ≈ [sre])=2.5 (1.6)

Thus, the presence of [r] in L1 has no effect the probability of a match, and the simplified random effects structure (and even the no random slopes one) might be preferable to the full one.

3.2.3.6 R/L distinction in L2?

Random effects are:

  • frequentist (ML): (1 | Language)
  • Bayesian full (Bfre): (1 | Language) + (1 | Family) + (1 | Autotyp_Area)
  • Bayesian simplified (Bsre): (1 | Language)
variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) ML 15.10 [-74.30, 104.49] 100.0% [0.0%, 100.0%] 0.741
Bfre 3.25 [-1.14, 8.07] 96.3% [24.2%, 100.0%] 0.0%
Bsre 3.45 [-0.96, 8.21] 96.9% [27.6%, 100.0%] 0.0%
r/l distinction in L2 (βdistinct-not) ML -12.98 [-102.38, 76.42] 0.0% [0.0%, 100.0%] 0.776 VS null: χ2(1)=0.30, p=0.581, ΔAIC=1.7
r/l distinction in L2 (βdistinct-not) Bfre -1.25 [-6.13, 2.91] 22.2% [0.2%, 94.8%] VS null: BF: anecdotal evidence for [0] against [+] (BF=1.16), ΔLOO([0] ≈ [+])=0.1 (0.1), ΔWAIC([0] ≈ [+])=0.1 (0.1), ΔKFOLD([+] ≈ [0])=0.3 (1.3)
r/l distinction in L2 (βdistinct-not) Bsre -1.32 [-6.08, 3.06] 21.0% [0.2%, 95.5%] VS null: BF: moderate evidence for [+] against [0] (BF=0.116), ΔLOO([+] ≈ [0])=0.4 (1.3), ΔWAIC([+] ≈ [0])=0.4 (1.3), ΔKFOLD([0] ≈ [+])=0.8 (1.8)
VS Bfre: BF: strong evidence for [sre] against [fre] (BF=0.0929), ΔLOO([sre] ≈ [fre])=0.5 (1.3), ΔWAIC([sre] ≈ [fre])=0.5 (1.3), ΔKFOLD([fre] ≈ [sre])=1.1 (1.7)

Thus, r/l distinction in L2 has no effect the probability of a match, and the simplified random effects structure might be preferable to the full one.

3.2.3.7 [r] in L2?

Random effects are:

  • frequentist (ML): (1 + trill_real_L2_f | Language)
  • Bayesian full (Bfre): (1 + trill_real_L2_f | Language) + (1 + trill_real_L2_f | Family) + (1 + trill_real_L2_f | Autotyp_Area)
  • Bayesian simplified (Bsre): (1 + trill_real_L2_f | Language)
  • Bayesian simplified no random slope (Bnrs): (1 | Language)
variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) ML 2.18 [1.74, 2.62] 89.9% [85.1%, 93.2%] 3.01×10-22
Bfre 2.06 [1.15, 3.06] 88.7% [76.0%, 95.5%] 0.0%
Bsre 2.16 [1.74, 2.62] 89.7% [85.1%, 93.2%] 0.0%
Bnrs 2.13 [1.73, 2.56] 89.3% [84.9%, 92.8%] 0.0%
[r] in L2 (βyes-no) ML -0.01 [-0.65, 0.64] 49.8% [34.3%, 65.4%] 0.985 VS null: χ2(3)=1.18, p=0.759, ΔAIC=4.8
[r] in L2 (βyes-no) Bfre 0.33 [-1.38, 2.30] 58.1% [20.0%, 90.9%] VS null: BF: extreme evidence for [0] against [+] (BF=133), ΔLOO([0] ≈ [+])=1.3 (1.8), ΔWAIC([0] ≈ [+])=1.0 (1.7), ΔKFOLD([0] ≈ [+])=4.0 (2.7)
[r] in L2 (βyes-no) Bsre 0.08 [-0.63, 0.72] 52.0% [34.8%, 67.3%] VS null: BF: anecdotal evidence for [0] against [+] (BF=2.18), ΔLOO([0] ≈ [+])=0.6 (1.7), ΔWAIC([0] ≈ [+])=0.5 (1.7), ΔKFOLD([0] ≈ [+])=2.5 (2.3)
VS Bfre: BF: very strong evidence for [sre] against [fre] (BF=0.0155), ΔLOO([sre] ≈ [fre])=0.7 (1.5), ΔWAIC([sre] ≈ [fre])=0.5 (1.5), ΔKFOLD([sre] ≈ [fre])=1.5 (2.3)
[r] in L2 (βyes-no) Bnrs 0.02 [-0.49, 0.52] 50.5% [38.1%, 62.8%] VS null: BF: anecdotal evidence for [+] against [0] (BF=0.919), ΔLOO([0] ≈ [+])=0.4 (1.2), ΔWAIC([0] ≈ [+])=0.4 (1.2), ΔKFOLD([0] ≈ [+])=1.9 (1.9)
VS Bfre: BF: extreme evidence for [nrs] against [fre] (BF=0.00587), ΔLOO([nrs] ≈ [fre])=0.9 (2.3), ΔWAIC([nrs] ≈ [fre])=0.6 (2.2), ΔKFOLD([nrs] ≈ [fre])=2.1 (2.8)
VS Bsre: BF: anecdotal evidence for [nrs] against [sre] (BF=0.422), ΔLOO([nrs] ≈ [sre])=0.3 (1.3), ΔWAIC([nrs] ≈ [sre])=0.1 (1.3), ΔKFOLD([nrs] ≈ [sre])=0.6 (1.9)

Thus, the presence of [r] in L2 has no effect the probability of a match, and the simplified random effects structure (and even the no random slopes one) might be preferable to the full one.

3.2.4 Starting from the a model with all predictors

As a test, we started from a model with all the predictors for L1 (but no interactions – a separate test (not shown here) clearly shows they do not matter – and we manually simplified it. As expected, Sex, Age and r/l distinction do not contribute at all. Interestingly, however, the model including only Order and the presence of [r] suggests that latter might make a significant contribution while the first doesn’t:

variable log odds ratio (LOR) probability (%) p
intercept (α) 2.79 [1.92, 3.59] 94.2% [87.2%, 97.3%] 0.0%
order (βl_first-r_first) -0.99 [-2.29, 0.15] 27.1% [9.2%, 53.7%] pROPE=0.1%, p(β=0)=0.51
[r] (βyes-no) -0.96 [-1.71, -0.25] 27.6% [15.3%, 43.8%] pROPE=0.0%, p(β=0)=0.18

VS null: BF: extreme evidence for [order + [r]] against [0] (BF=5.22e-05), ΔLOO([order + [r]] >> [0])=17.7 (6.1), ΔWAIC([order + [r]] >> [0])=18.0 (6.1), ΔKFOLD([order + [r]] >> [0])=15.7 (6.5)

VS full: BF: extreme evidence for [order + [r]] against [full] (BF=7.38e-13), ΔLOO([order + [r]] >> [full])=6.4 (2.3), ΔWAIC([order + [r]] >> [full])=5.6 (2.3), ΔKFOLD([order + [r]] ≈ [full])=2.6 (3.3)

However, removing Order shows that this not the case:

variable log odds ratio (LOR) probability (%) p
intercept (α) 2.10 [1.21, 2.96] 89.1% [77.1%, 95.1%] 0.0%
[r] (βyes-no) -0.76 [-1.56, 0.01] 31.8% [17.4%, 50.3%] pROPE=0.0%, p(β=0)=0.50

VS null: BF: anecdotal evidence for [[r]] against [0] (BF=0.951), ΔLOO([[r]] ≈ [0])=1.3 (1.5), ΔWAIC([[r]] ≈ [0])=1.3 (1.5), ΔKFOLD([[r]] ≈ [0])=0.4 (2.5)

VS full: BF: extreme evidence for [[r]] against [full] (BF=2.15e-08), ΔLOO([full] > [[r]])=10.0 (6.7), ΔWAIC([full] > [[r]])=11.1 (6.6), ΔKFOLD([full] > [[r]])=12.8 (6.9)

VS full: BF: extreme evidence for [order + [r]] against [[r]] (BF=5.87e-05), ΔLOO([order + [r]] >> [[r]])=16.4 (6.1), ΔWAIC([order + [r]] >> [[r]])=16.6 (6.1), ΔKFOLD([order + [r]] >> [[r]])=15.3 (6.5)

Please note that this could be to the use of all three random effects (Language, Family and Area) here.

Nevertheless, even if we were to accept that the presence of [r] in L1 would affect the probability of a match, the presence of this sound in L1 would reduce this probability from ~95% to ~65%.

3.2.5 Further exploration of the relationship between Order and [r] in L1

Fitting multiple models featuring Order and the presence of [r] in L1, leads to the following findings:

random effects structure Order (l_first - r_first) trill (yes - no) intercept
(1 + Order | Language) + (1 + Order | Family) -0.96 [-2.07, -0.01] p=0.44 * -0.94 [-1.66, -0.20] p=0.19 * 2.76 [ 2.05, 3.49]
(1 + Order | Language) + (1 | Family) -0.64 [-1.28, 0.03] p=0.54 -0.91 [-1.66, -0.18] p=0.25 * 2.67 [ 1.96, 3.41]
(1 | Language) + (1 + Order | Family) -1.24 [-2.11, -0.40] p=0.08 * -0.78 [-1.54, -0.03] p=0.44 * 2.76 [ 2.04, 3.56]
(1 | Language) + (1 | Family) -0.92 [-1.35, -0.50] p=0.00 * -0.72 [-1.51, 0.08] p=0.57 2.58 [ 1.80, 3.41]
-0.87 [-1.29, -0.46] p=0.00 * -0.22 [-0.62, 0.18] p=0.88 2.52 [ 2.16, 2.90]

which shows that:

  • Order seems to have a negative effect in all situations (even in case (2) the 95%HDI is mostly at the left of 0)
  • [r], on the other hand, seems to have a negative effect if one allows Order to have a random slope, but even if not, its 95%HDI is mostly to the left of 0; the only clear case where there is no effect is when there’s no random structure

Plotting the distribution of the actual data:

Distribution of *matches* in the web data (i.e., not modelled but the actual raw data) by *Order* (columns) and the *presence of [r] in the L1* (rows).

Distribution of matches in the web data (i.e., not modelled but the actual raw data) by Order (columns) and the presence of [r] in the L1 (rows).

This suggests that for the languages with [r], there is no real effect of Order, but in the languages without [r], presenting [t] first has a massive positive effect on the match.

The model (1 + Order | Language) + (1 + Order | Family) seems to support this:

Model predictions for the model with (1 + Order | Language) + (1 + Order | Family).
Model predictions for the model with (1 + Order | Language) + (1 + Order | Family).
The distribution of the random effects (intercept and slope of Order) by Language for the model with (1 + Order | Language) + (1 + Order | Family).
The distribution of the random effects (intercept and slope of Order) by Language for the model with (1 + Order | Language) + (1 + Order | Family).

Interestingly, the interaction between order and the presence of [r] is not supported when we include the random structure (e.g., for (1 + Order | Language) + (1 + Order | Family), 0.49 [-0.86, 1.83] p=0.75), but in the model without any random effects, the interaction is significantly different from 0 (0.93 [ 0.10, 1.72] p=0.36 *) and it also makes [r] be significantly from 0 (-0.81 [-1.91, -0.74] p=0.28 *), as suggested by the raw data.

Thus, in order for the negative effect of [r] to become visible, we need to control for Order appropriately (i.e., allowing it to have random slopes by Language).

3.2.6 Summary

In all cases the intercept α is highly significant and positive, comparable with the null model at ≥ 80%, so much higher than 50%.

On the other hand, of all the potential predictors only order is arguably adding something relevant to the null model, with “l first” decreasing the probability of a match by ≈30% (from ~90% to ~60%), still better than 50%.

There is a hint that the presence of [r] in L1 might have a negative effect, but this is far from clear.

For the full model (i.e., with all potential fixed effects and random effects, Match ~ 1 + Order + Sex + Age + r_l_distinction_L1_f + trill_real_L1_f + (1 + Order + Sex + Age | Language) + (1 + Order + Sex + Age | Family) + (1 + Order + Sex + Age | Autotyp_Area)):

Full model: the fixed effects showing the median, 66% and 95% quantiles of the posterior distribution.
Full model: the fixed effects showing the median, 66% and 95% quantiles of the posterior distribution.
Full model: the distribution of the proportion of matches per language.
Full model: the distribution of the proportion of matches per language.

4 Field experiment

4.1 Descriptive statistics

First, how many participants?

nrow(field)
## [1] 127

Sex division

table(field$Sex)
## 
##  f  m 
## 91 36

Ages

summary(field$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   19.00   20.00   28.55   34.50   75.00

First, how many languages?

field %>% count(Language) %>% nrow()
## [1] 6

Does this number correspond with the L1s?

field %>% count(Name) %>% nrow()
## [1] 6

How many families?

field %>% count(Family) %>% nrow()
## [1] 4

How many have the R/L distinction in the L1 among the languages?

field %>% count(Name, r_l_distinction_L1) %>% count(r_l_distinction_L1)

How many really use the alveolar trill in L1 among the languages?

field %>% count(Name, trill_real_L1) %>% count(trill_real_L1)

How many really have the alveolar trill in L1 as an allophone among the languages?

field %>% count(Name, trill_occ_L1) %>% count(trill_occ_L1)

What about the same questions for L2. But this will not neatly sum up to 25, due to various possible scenarios for L2 within a specific L1.

How many have the R/L distinction in the L2 among the languages?

field %>% count(Name, r_l_distinction_L2) %>% count(r_l_distinction_L2)

How many really use the alveolar trill in L2 among the languages?

field %>% count(Name, trill_real_L2) %>% count(trill_real_L2)

How many really have the alveolar trill in L2 as an allophone among the languages?

field %>% count(Name, trill_occ_L2) %>% count(trill_occ_L2)

What is the grand average congruent behavior?

mean(field$Match)
## [1] 0.976378

97%!!!

What about only among those who have L1 without the distinction?

field %>%
  filter(r_l_distinction_L1 == "0") %>%
  summarize(mean_match = mean(Match, na.rm = TRUE))

100%! WOW.

What about only among those who have L1 without the distinction and no L2 that distinguishes?

field %>%
  filter(r_l_distinction_L1 == "0") %>%
  filter(!EnglishL2YesNo) %>% 
  filter(r_l_distinction_L2 == '0') %>% 
  summarize(mean_match = mean(Match, na.rm = TRUE))

There are no such people.

Compute average matching behavior per language and sort:

field_avg <- field %>%
  group_by(Language) %>% 
  summarize(M = mean(Match)) %>% 
  arrange(desc(M)) %>% 
  mutate(percent = round(M, 2) * 100,
         percent = str_c(percent, '%'))

# Show:

field_avg %>% print(n = Inf)
## # A tibble: 6 × 3
##   Language     M percent
##   <chr>    <dbl> <chr>  
## 1 BR       1     100%   
## 2 PA       1     100%   
## 3 VA       1     100%   
## 4 BE       0.982 98%    
## 5 DE       0.947 95%    
## 6 SR       0.923 92%

Check some demographics, also to report in the paper. First, the number of participants per language:

field %>% 
  count(Name, sort = TRUE) %>% print(n = Inf)
## # A tibble: 6 × 2
##   Name                     n
##   <chr>                <int>
## 1 English UK              55
## 2 Tashlhiyt Berber        20
## 3 German                  19
## 4 Brazilian Portuguese    13
## 5 Daakie                  12
## 6 Palikur                  8

Then, the number of L1 speakers who have R/L distinction vs. who don’t:

field %>% count(r_l_distinction_L1) %>%
  mutate(prop = n / sum(n),
         percent = round(prop, 2) * 100,
         percent = str_c(percent, '%'))

How many people do not have any L2?

# raw count of no L2; raw count of all ppl
sum(is.na(field$L2)); nrow(field)
## [1] 52
## [1] 127
# percentage no L2
sum(is.na(field$L2)) / nrow(field)
## [1] 0.4094488
# percentage with L2
1 - sum(is.na(field$L2)) / nrow(field)
## [1] 0.5905512

Check how many people knew English as their L2:

field %>% count(EnglishL2YesNo) %>% 
  mutate(prop = n / sum(n),
         percent = round(prop, 2) * 100,
         percent = str_c(percent, '%'))

Of those that don’t use a R/L distinction in their L1, how many use R/L distinction in their L2? (double-check if logic alright!)

field %>%
  filter(r_l_distinction_L1 == '0') %>% 
  count(r_l_distinction_L2 == '1') %>% 
  mutate(prop = n / sum(n),
         percent = round(prop, 2) * 100,
         percent = str_c(percent, '%'))

How many “pure” speakers were there?, i.e., those people that 1) don’t know English, 2) don’t use an L1 with a R/L distinction, and 3) don’t know an L2 that distinguishes R/L.

field %>% 
  filter(r_l_distinction_L1 == '0') %>%  # excludes English as well
  filter(!EnglishL2YesNo) %>% 
  filter(r_l_distinction_L2 == '0') %>% 
  nrow()
## [1] 0

None.

4.2 Regression models

Check the distribution of scripts across families to make decisions about random effects structure:

table(field$Family, field$r_l_distinction_L1)
##               
##                 0  1
##   Afro-Asiatic  0 20
##   Arawakan      8  0
##   Austronesian  0 12
##   IE            0 87
field %>% count(r_l_distinction_L1) %>%
  mutate(prop = n / sum(n))
# highly imbalanced

field %>% count(trill_real_L1) %>%
  mutate(prop = n / sum(n))
field %>% count(trill_occ_L1) %>%
  mutate(prop = n / sum(n))
# highly imbalanced

## And for L2, just in case
field %>% count(r_l_distinction_L2) %>%
  mutate(prop = n / sum(n))
field %>% count(trill_real_L2) %>%
  mutate(prop = n / sum(n))
field %>% count(trill_occ_L2) %>%
  mutate(prop = n / sum(n))
# Code them as factors:
field$r_l_distinction_L1_f <- factor(c("same", "distinct")[field$r_l_distinction_L1 + 1], levels=c("same", "distinct"));
field$trill_real_L1_f      <- factor(c("no", "yes")[field$trill_real_L1 + 1], levels=c("no", "yes"));
field$trill_occ_L1_f       <- factor(c("no", "yes")[field$trill_occ_L1 + 1], levels=c("no", "yes"));
field$r_l_distinction_L2_f <- factor(c("same", "distinct")[field$r_l_distinction_L2 + 1], levels=c("same", "distinct"));
field$trill_real_L2_f      <- factor(c("no", "yes")[field$trill_real_L2 + 1], levels=c("no", "yes"));
field$trill_occ_L2_f       <- factor(c("no", "yes")[field$trill_occ_L2 + 1], levels=c("no", "yes"));

4.2.1 What random structure to use?

Sex: likewise, sex varies by design so it should have random slopes for all three.

Age: same for age, so it should have random slopes for all three.

r/l distinction in L1:

table(field$r_l_distinction_L1, field$Language)
##    
##     BE BR DE PA SR VA
##   0  0  0  0  8  0  0
##   1 55 20 19  0 13 12
table(field$r_l_distinction_L1, field$Family)
##    
##     Afro-Asiatic Arawakan Austronesian IE
##   0            0        8            0  0
##   1           20        0           12 87
#field %>% group_by(Family) %>% select(Family, Name) %>% unique() %>% arrange(Family)
table(field$r_l_distinction_L1, field$Autotyp_Area)
##    
##     Europe N Africa NE South America Oceania
##   0      0        0                8       0
##   1     87       20                0      12
#field %>% group_by(Autotyp_Area) %>% select(Autotyp_Area, Name) %>% unique() %>% arrange(Autotyp_Area)

This is perfectly uniform within the levels of each of the three factors, so random slopes are arguably not justified a priori for neither of them.

r/l distinction in L2:

table(field$r_l_distinction_L2, field$Language)
##    
##     BE BR DE PA SR VA
##   0  1  0  0  0  0  0
##   1 18 20 14  8  1 12
table(field$r_l_distinction_L2, field$Family)
##    
##     Afro-Asiatic Arawakan Austronesian IE
##   0            0        0            0  1
##   1           20        8           12 33
table(field$r_l_distinction_L2, field$Autotyp_Area)
##    
##     Europe N Africa NE South America Oceania
##   0      1        0                0       0
##   1     33       20                8      12

There is almost no “absent” at all, so random slopes are arguably not justified a priori for neither of them.

presence of [r] in L1:

table(field$trill_real_L1, field$Language)
##    
##     BE BR DE PA SR VA
##   0 55  0 19  8 13  0
##   1  0 20  0  0  0 12
table(field$trill_real_L1, field$Family)
##    
##     Afro-Asiatic Arawakan Austronesian IE
##   0            0        8            0 87
##   1           20        0           12  0
table(field$trill_real_L1, field$Autotyp_Area)
##    
##     Europe N Africa NE South America Oceania
##   0     87        0                8       0
##   1      0       20                0      12

There is no variation here, so no random slopes either.

presence of [r] in L2:

table(field$trill_real_L2, field$Language)
##    
##     BE BR DE PA SR VA
##   0  9  0 10  8  1  0
##   1 10 20  4  0  0 12
table(field$trill_real_L2, field$Family)
##    
##     Afro-Asiatic Arawakan Austronesian IE
##   0            0        8            0 20
##   1           20        0           12 14
table(field$trill_real_L2, field$Autotyp_Area)
##    
##     Europe N Africa NE South America Oceania
##   0     20        0                8       0
##   1     14       20                0      12

There is no variation here, so no random slopes either.

Given these, we did the following:

  • we always include random slopes for Sex and Age
  • we do not model random slopes for the r/l distinction not for the presence of [r] in L1 or L2

(Please note that we hide the code for the model fitting as it is too large and clutters the output, but it can be consulted in the Rmarkdown file.)

Please note that we could not fit frequentist models as the random effects structure does not converge.

4.2.2 The probability of a perfect match in the absence of any predictors

First, we determined the probability of a perfect match (i.e., both responses are correct) without any predictors (aka the null model).

For clarity, the null model is:

Match ~ 1 + 
  (1 | Language)

and we are interested in the intercept, which represents the probability of a match.

cat(paste0(field_regressions_summaries$brms$null$summary, collapse="\n"));
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: Match ~ 1 + (1 | Language) 
##    Data: field (Number of observations: 127) 
##   Draws: 4 chains, each with iter = 10000; warmup = 4000; thin = 2;
##          total post-warmup draws = 12000
## 
## Group-Level Effects: 
## ~Language (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.99      0.98     0.03     3.58 1.00     6502     7736
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     3.83      0.88     2.32     5.77 1.00     7124     6903
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

The intercept is clearly and significantly > 0 (% inside ROPE = 0.0%) and translates into a probability of a match p(match)=97.9% 95%HDI [90.6%, 99.7%] ≫ 50%.

The model converges well:
Traces of the Bayesian fitting processes.
Traces of the Bayesian fitting processes.
and the results show a positive effect of rough (but the 95%HDI does include 0)::
Posterior estimates (medians) with 50% and 90% posterior probability intervals.
Posterior estimates (medians) with 50% and 90% posterior probability intervals.
The posterior predictive checks seem ok but far from perfect:
Posterior predictive checks: density overlays of the observed (y) and simulated (yrep) data.
Posterior predictive checks: density overlays of the observed (y) and simulated (yrep) data.
Posterior predictive checks: minimum, mean and maximum values of the observed (y) and simulated (yrep) data.
Posterior predictive checks: minimum, mean and maximum values of the observed (y) and simulated (yrep) data.

4.2.3 The potential predictors individually

Please note that L2 is degenerate and the models do not make any sense.

4.2.3.1 Sex

variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) B 4.06 [2.12, 6.09] 98.3% [89.3%, 99.8%] 0.0%
sex (βM-F) B 0.42 [-2.62, 3.69] 60.4% [6.8%, 97.6%] pROPE=0.1%, p(β=0)=0.65 VS null: BF: anecdotal evidence for [0] against [+] (BF=2.32), ΔLOO([0] ≈ [+])=0.9 (0.5), ΔWAIC([0] ≈ [+])=0.7 (0.5), ΔKFOLD([0] ≈ [+])=1.0 (0.6)

4.2.3.2 Age

variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) B 6.47 [2.15, 11.64] 99.8% [89.5%, 100.0%] 0.0%
age (β) B -0.07 [-0.23, 0.09] 48.3% [44.2%, 52.2%] pROPE=1.0%, p(β=0)=0.96 VS null: BF: extreme evidence for [0] against [+] (BF=890), ΔLOO([0] ≈ [+])=1.0 (1.9), ΔWAIC([0] ≈ [+])=0.4 (1.9), ΔKFOLD([0] ≈ [+])=2.1 (1.9)

4.2.3.3 R/L distinction in L1?

variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) B 5.14 [0.94, 10.27] 99.4% [72.0%, 100.0%] 0.0%
r/l distinction (βdistinct-not) B -1.29 [-6.13, 3.07] 21.5% [0.2%, 95.6%] pROPE=0.1%, p(β=0)=0.56 VS null: BF: anecdotal evidence for [0] against [+] (BF=1.23), ΔLOO([0] ≈ [+])=0.1 (0.1), ΔWAIC([0] ≈ [+])=0.0 (0.0), ΔKFOLD([0] ≈ [+])=0.0 (0.1)

4.2.3.4 [r] in L1?

variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) B 3.63 [2.02, 5.52] 97.4% [88.2%, 99.6%] 0.0%
[r] (βyes-no) B 1.90 [-1.91, 6.04] 87.0% [12.9%, 99.8%] pROPE=0.1%, p(β=0)=0.52 VS null: BF: anecdotal evidence for [0] against [+] (BF=1.03), ΔLOO([+] ≈ [0])=0.4 (0.2), ΔWAIC([+] ≈ [0])=0.4 (0.2), ΔKFOLD([+] ≈ [0])=0.5 (0.3)

4.2.3.5 R/L distinction in L2?

variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) B 8.78 [-0.36, 20.66] 100.0% [41.1%, 100.0%] 0.0%
r/l distinction (βdistinct-not) B -0.19 [-6.38, 5.43] 45.3% [0.2%, 99.6%] pROPE=0.1%, p(β=0)=0.50 VS null: BF: anecdotal evidence for [0] against [+] (BF=1.04), ΔLOO([+] ≈ [0])=0.0 (0.0), ΔWAIC([0] ≈ [+])=0.0 (0.0), ΔKFOLD(? ? ?)=NA (NA)

4.2.3.6 [r] in L2?

variable method log odds ratio (LOR) probability (%) p model comparison
intercept (α) B 9.36 [1.80, 21.20] 100.0% [85.9%, 100.0%] 0.0%
[r] (βyes-no) B -0.16 [-5.46, 4.70] 46.1% [0.4%, 99.1%] pROPE=0.1%, p(β=0)=0.55 VS null: BF: anecdotal evidence for [0] against [+] (BF=1.2), ΔLOO([+] ≈ [0])=0.0 (0.0), ΔWAIC([+] ≈ [0])=0.0 (0.0), ΔKFOLD([+] ≈ [0])=0.0 (0.0)

4.2.4 Summary

In all cases the intercept α is highly significant and positive, comparable with the null model at ≥ 98%, so ≫ 50%. On the other hand, none of the potential predictors seem to make any difference.

5 Conclusions

It is clear that there is a very strong association between [r] and the rugged line and between [l] and the continuous line across the board. On the other hand, no predictor seems to really affect this, except for the order of presentation (“l” first decreases the association) and, when order is also included and its random structure properly modeled, the presence of [r] in the L1, both for the web experiment.